library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
hist = readRDS('historical_web_data_26112015.rds')
plot_ly(x = hist$Longitude, y = hist$Latitude, z = hist$Depth, size = hist$Magnitude) %>% add_markers() %>% layout(scene = list(xaxis = list(title = 'Longitude'), yaxis = list(title = 'Latitude'), zaxis = list(title = 'Depth')))
library(readr)
library(mapdata)
library(ggplot2)
library(gganimate)
library(ggmap)
disaster = read_delim('disaster.txt', '\t', escape_double = FALSE, trim_ws = TRUE)
tsunami = disaster %>% filter(FLAG_TSUNAMI == 'Tsu')
world_map = map_data('world')
tsunami = tsunami[!is.na(tsunami$EQ_PRIMARY),]
tsunami = tsunami %>% filter(EQ_PRIMARY > 7)
ggplot() + geom_polygon(data = world_map, aes(x = long, y = lat, group = group)) + coord_fixed(1.2) + geom_point(data = tsunami, aes(x = LONGITUDE, y = LATITUDE), color = 'red', size = 0.5) + transition_time(EQ_PRIMARY) + ease_aes('linear') + labs(title = 'MAGNITUDE {frame_time}')
iran_er = readRDS('iran_earthquake.rds')
iran_map = read_rds("Tehrn_map_6.rds")
ggmap(iran_map) + stat_density_2d(geom = 'polygon', data = iran_er, aes(x =Long, y = Lat, fill = ..level..))
## Warning: Removed 243 rows containing non-finite values (stat_density2d).
iran_dis = disaster %>% filter(COUNTRY == 'IRAN' & EQ_PRIMARY > 6.5)
diff_year = data.frame(iran_dis[-1,]$YEAR - iran_dis[-nrow(iran_dis),]$YEAR)
colnames(diff_year)[1] = 'diff'
x = diff_year %>% filter(diff > 6)
x = nrow(x)
y = nrow(diff_year)
cat(1 - x/y)
## 0.5185185
library(rworldmap)
## Loading required package: sp
## ### Welcome to rworldmap ###
## For a short introduction type : vignette('rworldmap')
country_average = disaster %>% group_by(COUNTRY) %>% summarise(avgDeath = mean(DEATHS, na.rm = TRUE), totalDeath = sum(DEATHS, na.rm = TRUE))
colnames(country_average)[colnames(country_average)=='COUNTRY'] <- 'country'
mapDevice('x11')
heatMap = joinCountryData2Map(country_average, joinCode="NAME", nameJoinColumn="country")
## 138 codes from your data successfully matched countries in the map
## 16 codes from your data failed to match with a country code in the map
## 105 codes from the map weren't represented in your data
mapCountryData(heatMap, nameColumnToPlot="totalDeath")
## You asked for 7 quantiles, only 5 could be created in quantiles classification
model = lm(data = disaster, formula = DEATHS ~ LONGITUDE + LATITUDE + FOCAL_DEPTH + EQ_PRIMARY)
summary(model)
##
## Call:
## lm(formula = DEATHS ~ LONGITUDE + LATITUDE + FOCAL_DEPTH + EQ_PRIMARY,
## data = disaster)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12556 -3519 -1872 35 311710
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -15451.303 3081.358 -5.014 6.13e-07 ***
## LONGITUDE -1.232 5.813 -0.212 0.83226
## LATITUDE 64.237 21.852 2.940 0.00335 **
## FOCAL_DEPTH -21.929 11.354 -1.931 0.05367 .
## EQ_PRIMARY 2678.740 464.175 5.771 1.01e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15440 on 1178 degrees of freedom
## (4843 observations deleted due to missingness)
## Multiple R-squared: 0.031, Adjusted R-squared: 0.0277
## F-statistic: 9.42 on 4 and 1178 DF, p-value: 1.7e-07
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(stringr)
worldwide = read.csv('worldwide.csv')
worldwide$date = as_date(worldwide$time)
worldwide$day = day(worldwide$date)
worldwide$year = year(worldwide$date)
worldwide$month = month(worldwide$date)
worldwide$country = sapply(worldwide$place, function(X){
tail(str_split(X, ", ")[[1]], 1)
})
biggest = worldwide %>% group_by(country, day, month, year) %>% filter(mag == max(mag))
secondBiggest = worldwide %>% group_by(country, day, month, year) %>%
mutate(rank = rank(desc(mag))) %>%
arrange(rank) %>% filter(rank == 2)
joined = inner_join(secondBiggest, biggest, by = c('country', 'day', 'month', 'year'))
train = joined[1:as.integer(0.8 * nrow(joined)),]
test = joined[-(1:as.integer(0.8 * nrow(joined))),]
reg = lm(data = train, mag.y ~ mag.x)
summary(reg)
##
## Call:
## lm(formula = mag.y ~ mag.x, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.39162 -0.28967 -0.09111 0.11033 2.60982
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.38649 0.02342 16.5 <2e-16 ***
## mag.x 1.00103 0.00583 171.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3756 on 6926 degrees of freedom
## Multiple R-squared: 0.8098, Adjusted R-squared: 0.8097
## F-statistic: 2.948e+04 on 1 and 6926 DF, p-value: < 2.2e-16
summary(stats::predict(reg, test) - test$mag.y)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.508789 -0.109405 0.091314 0.006798 0.290287 0.391211
جون مقدار کورلیشن تست کم شده است در نتیجه این دو مقدار به هم وابسته نیستند.
cor.test(worldwide$depth, worldwide$mag, method=c("pearson", "kendall", "spearman"))
##
## Pearson's product-moment correlation
##
## data: worldwide$depth and worldwide$mag
## t = 50.03, df = 60629, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1914584 0.2067469
## sample estimates:
## cor
## 0.1991147
library(lubridate)
worldwide = read.csv('worldwide.csv')
worldwide$place = sub(".*, ", "", worldwide$place)
worldwide$time = as.Date(worldwide$time)
worldwide = worldwide %>% mutate(year = year(time))
worldwide = worldwide %>% group_by(place, year) %>% summarise(count = n())
worldwide = worldwide %>% group_by(place) %>% summarise(cnt = mean(count))
ww = worldwide %>% arrange(desc(cnt)) %>% top_n(10)
## Selecting by cnt
ggplot(ww) + geom_bar(aes(reorder(place, cnt), cnt), stat = "identity") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
در قسمت اول بیشترین تعداد زلزله بالای ۳ ریشتر در استانهای کشور به دست آمده است. در قسمت دوم تعداد زلزلههای بالای ۶ ریشتر از سال ۲۰۰۶ به بعد در ایران نشان داده شده است. در قسمت سوم بیشترین تعداد زلزله با بزرگی بیشتر از ۸ ریشتر به ترتیب کشور نشان داده شده
iran2015 = hist %>% filter(Magnitude > 3) %>% group_by(Province) %>% summarise(cnt = n()) %>% arrange(desc(cnt)) %>% top_n(10)
## Selecting by cnt
ggplot(iran2015) + geom_bar(aes(reorder(Province, cnt), cnt), stat = "identity")
iran_er %>% filter(Mag > 6) %>% summarise(cnt = n())
## cnt
## 1 13
bigearth = disaster %>% filter(EQ_PRIMARY > 8) %>% group_by(COUNTRY) %>% summarise(cnt = n()) %>% arrange(desc(cnt)) %>% top_n(10)
## Selecting by cnt
ggplot(bigearth) + geom_bar(aes(reorder(COUNTRY, cnt), cnt), stat = "identity")